library(Seurat)
library(tidyseurat)
library(clusterProfiler)
library(tidyverse)
library(ggpubr)
library(patchwork)
source('00_util_scripts/mod_seurat.R')
source('00_util_scripts/mod_bplot.R')

# mast cell TNF/VEGFA ----------
sobj_sg <- read_rds('CRC-I/data/seekgene/crc-starsolo-annotated.rds')

sobj_sg$genotype |> table()

sobj.sg.mast <- sobj_sg |>
  filter(zhang2020_fine == 'Mast-TPSAB1')

sobj.sg.mast |>
  FindMarkers(features = c('VEGFA','TNF'),
              group.by = 'genotype', ident.1 = 'TT')

sobj.sg.mast |>
  VlnPlot(features = c('VEGFA','TNF'),
          group.by = 'genotype')

logtpm.mean <- function(x)log1p(mean(expm1(x)))

sobj.sg.mast <- sobj.sg.mast |>
  get_abundance_sc_wide(features = c('VEGFA','TNF')) |>
  left_join(sobj.sg.mast, y = _)

g.vegfa <- sobj.sg.mast |>
  ggplot(aes(genotype, VEGFA, fill = genotype)) +
  geom_violin() +
  stat_summary(geom = 'crossbar', width = .3, fun = 'logtpm.mean') +
  theme_pubr(legend = 'none')

g.tnf <- sobj.sg.mast |>
  ggplot(aes(genotype, TNF, fill = genotype)) +
  geom_violin() +
  stat_summary(geom = 'crossbar', width = .3, fun = 'logtpm.mean') +
  theme_pubr(legend = 'none')

g.tnf + g.vegfa + plot_annotation(title = 'Mast cell in CRC TME')

sobj.sg.mast <- sobj.sg.mast |>
  quick_process_seurat(skip_norm = T)

sobj.sg.mast |>
  DimPlot(group.by = 'seurat_clusters')

sobj.sg.mast |>
  DotPlot(features = c('rna_VEGFA','rna_TNF'))

sobj.sg.mast |>
  FindAllMarkers(features = c('VEGFA','TNF'), only.pos = T)

sobj.sg.mast <- sobj.sg.mast |>
  mutate(vegfa.mast = ifelse(seurat_clusters %in% c(1,5,7), 'VEGFA+', 'VEGFA-'))

sobj.sg.mast |>
  test_on_grouped_count(group = genotype, subtype = vegfa.mast)

sobj.sg.mast |>
  calc_frac_conf_on_grouped_count(group = genotype, subtype = vegfa.mast) |>
  filter(vegfa.mast == 'VEGFA+') |>
  ggplot(aes(genotype, fraction, fill = genotype)) +
  geom_col() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = .3) +
  theme_pubr(legend = 'right') +
  labs(title = 'Fraction of VEGFA+ mast cell in CRC TME mast cells')

sobj.sg.mast@meta.data |>
discov_frac_change(group = genotype, subtype = seurat_clusters,
                   var.1 = TT, var.2 = IT) |>
  ggplot(aes(log2fc_frac, subtype, fill = type)) +
  geom_col() +
  scale_fill_manual(values = c('blue','red','grey')) +
  theme_pubr()

sobj.sg.mast$latent_cluster |> table()

sobj.sg.mast |>
  annotate_latents(big_class = 'Mast cell')

## DEGA -----
sobj.sg.mast.tvi <- sobj.sg.mast |>
  FindMarkers(ident.1 = 'TT', group.by = 'genotype') |>
  as_tibble(rownames = 'gene')

sobj.sg.mast.tvi |>
  filter(!str_detect(gene, '^ENSG0')) |>
  plot_bill_volc(group1 = 'TT', group2 = 'IT')

sobj.sg.mast.tvi |>
  write_csv('CRC-I/results/seek3.mast.ttvit.deg.csv')

## GSEA -----
sg.mast.tvi.glist <- sobj.sg.mast.tvi |>
  filter(p_val_adj < .05) |>
  pull(avg_log2FC, name = gene) |>
  sort(decreasing = T)

mast.tvi.gsego <- sg.mast.tvi.glist |>
  gseGO(OrgDb = 'org.Hs.eg.db', keyType = "SYMBOL", ont = 'ALL')

mast.tvi.gsego |>
  dotplot()

## ORA ---------
mast.tvi.upgo <- sobj.sg.mast.tvi |>
  filter(p_val_adj < .05, avg_log2FC > 1) |>
  pull(gene) |>
  enrichGO(OrgDb = 'org.Hs.eg.db', keyType = "SYMBOL", readable = T,
           ont = 'ALL')

mast.tvi.upgo |>
  dotplot()

mast.tvi.upgo@result |>
  DT::datatable()

sobj.sg.mast |>
  ggplot(aes(UMAP_1, UMAP_2, color = seurat_clusters %in% c(3,6,7))) +
  geom_point()

sobj.sg.mast@reductions$umap |> glimpse()
